library(readr)
tele <- read_csv("C:\\Users\\Rober\\OneDrive\\Documents\\Uni Stuff\\NOTEBOOKS\\Class\\telecom.csv")

-- Column specification ---------------------------------------------------------------------------------------------------------------------------------------------------------------
cols(
  .default = col_character(),
  SeniorCitizen = col_double(),
  tenure = col_double(),
  MonthlyCharges = col_double(),
  TotalCharges = col_double()
)
i Use `spec()` for the full column specifications.
View(tele)
library("tidyverse")
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.3     v dplyr   1.0.4
v tibble  3.0.6     v stringr 1.4.0
v tidyr   1.1.2     v forcats 0.5.1
v purrr   0.3.4     
-- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library("ggplot2")
library("magrittr") 

Attaching package: 㤼㸱magrittr㤼㸲

The following object is masked from 㤼㸱package:purrr㤼㸲:

    set_names

The following object is masked from 㤼㸱package:tidyr㤼㸲:

    extract
library("dplyr") 
library("data.table")
data.table 1.13.6 using 2 threads (see ?getDTthreads).  Latest news: r-datatable.com

Attaching package: 㤼㸱data.table㤼㸲

The following objects are masked from 㤼㸱package:dplyr㤼㸲:

    between, first, last

The following object is masked from 㤼㸱package:purrr㤼㸲:

    transpose
library("mlr3verse")
library("paradox")
library("mlr3tuning")
Loading required package: mlr3
library("skimr")
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
skim(tele)
-- Data Summary ------------------------
                           Values
Name                       tele  
Number of rows             5986  
Number of columns          20    
_______________________          
Column type frequency:           
  character                16    
  numeric                  4     
________________________         
Group variables            None  

-- Variable type: character -----------------------------------------------------------------------------------------------------------------------------------------------------------
# A tibble: 16 x 8
   skim_variable    n_missing complete_rate   min   max empty n_unique whitespace
 * <chr>                <int>         <dbl> <int> <int> <int>    <int>      <int>
 1 gender                   0             1     4     6     0        2          0
 2 Partner                  0             1     2     3     0        2          0
 3 Dependents               0             1     2     3     0        2          0
 4 PhoneService             0             1     2     3     0        2          0
 5 MultipleLines            0             1     2    16     0        3          0
 6 InternetService          0             1     2    11     0        3          0
 7 OnlineSecurity           0             1     2    19     0        3          0
 8 OnlineBackup             0             1     2    19     0        3          0
 9 DeviceProtection         0             1     2    19     0        3          0
10 TechSupport              0             1     2    19     0        3          0
11 StreamingTV              0             1     2    19     0        3          0
12 StreamingMovies          0             1     2    19     0        3          0
13 Contract                 0             1     8    14     0        3          0
14 PaperlessBilling         0             1     2     3     0        2          0
15 PaymentMethod            0             1    12    25     0        4          0
16 Churn                    0             1     2     3     0        2          0

-- Variable type: numeric -------------------------------------------------------------------------------------------------------------------------------------------------------------
# A tibble: 4 x 11
  skim_variable  n_missing complete_rate     mean       sd    p0   p25    p50    p75  p100 hist 
* <chr>              <int>         <dbl>    <dbl>    <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl> <chr>
1 SeniorCitizen          0         1        0.161    0.368   0     0      0      0      1  ▇▁▁▁▂
2 tenure                 0         1       32.5     24.5     0     9     29     56     72  ▇▃▃▃▆
3 MonthlyCharges         0         1       64.8     30.1    18.2  35.6   70.4   89.9  119. ▇▅▆▇▅
4 TotalCharges          10         0.998 2298.    2274.     18.8 404.  1412.  3847.  8685. ▇▂▂▂▁
#data exploration
tele[["MonthlyCharges"]]
   [1]  24.10  88.15  74.95  55.90  53.45  49.85  90.65  24.90  35.55 116.50  68.75  51.20  99.00  54.90 109.55 106.80  74.30  25.60  94.20  46.35  25.60 107.60  19.55  96.20  69.65
  [26]  78.50  45.10  25.75 104.10 104.75  81.35  96.35  93.55  19.70  69.10  35.65  99.40  54.40  19.30  83.80  68.95  62.85 110.45  45.30  75.65  19.85  55.30  24.75  54.75  94.00
  [51] 110.30  18.80 100.80  60.00  66.05  79.75  94.80  25.45 111.15  55.25  59.50  25.35  98.55  61.20  60.90  74.55  19.65  48.75  19.15  84.70  34.25  80.70  19.45  19.70  83.20
  [76]  19.85  19.15  54.85  75.05  25.10  84.35  89.75  48.40  19.90  51.75  92.40  45.25  94.80  69.85  20.10  74.90 109.90  78.00  91.40  74.95  50.00  20.90  49.80  56.25  30.40
 [101]  19.10  59.90  20.00  79.60  19.45  74.85  73.85  20.20  71.10 109.20  62.65  59.45  45.05  60.35 114.95 114.60  50.15  86.65  65.70  19.90  19.85  59.75  45.00  71.40 101.00
 [126]  75.40 118.75  19.45  20.40  84.50  83.80  20.35 109.30  89.40  76.50  24.60 105.05  83.65 104.45 106.85 105.10  46.30  76.80  81.35  99.15  20.35  89.85  84.40  19.75  64.45
 [151]  68.95  24.40  29.70  58.75  75.40  25.00 114.30  19.65  84.25  74.60  46.00  38.85  19.65  34.60  99.10  50.15  59.45  19.50 112.20  69.95  23.85  54.30  20.20  55.90  19.15
 [176]  90.15  44.90 100.50  91.15  70.15  90.00 108.65  49.45  79.20  60.70  79.85  97.20  95.60  89.30  85.25  89.45  94.70  75.35  70.85  29.05  20.60  25.40 101.30  87.25 116.55
 [201] 107.70 102.70 115.15  76.10  60.40  64.35  19.60  49.35  24.40  20.50 110.55  99.20  84.95  94.65  39.10  84.45  19.70  19.90  18.90  19.55  85.65  54.55  19.85  86.40  34.65
 [226]  24.85  78.10 105.65  78.35  85.95  19.35  71.65  91.15  56.25  95.80  78.45  74.40  69.00 106.10  56.70 102.95  91.40 110.85  21.05  93.80 111.10  55.05  33.15  18.75  70.55
 [251]  74.45  65.15  19.85  24.80  20.05  80.55  20.35  89.60  45.05  49.25  50.55  72.25 118.65  66.50  19.45  49.80  69.85 107.20  93.35  20.00  24.50  56.05  80.85  70.50 107.50
 [276]  59.85  60.85 100.20  19.65  64.00  90.60  25.25  20.25  93.45 100.50  95.65  43.95  19.25  73.80 104.40  79.25  81.55  93.80  24.30  70.45  25.60  99.00  69.95  76.00  24.90
 [301]  64.80 103.05  20.10  19.90 110.65 109.05  19.20  66.05  45.30  84.40  45.00  99.80 106.00  20.65  71.50  83.55  98.70  49.50  48.70  19.20  82.30  21.10  69.00  19.55  75.10
 [326]  54.60  70.20  24.75  69.85  79.20  69.65  84.10  65.35  25.75 100.30  89.15  44.85  74.45  86.40  19.70  60.55  25.20  79.40  25.00  73.65  55.60 113.25  68.30 110.05  19.90
 [351]  43.95  31.35  64.85  97.20  23.45  70.20  69.55  18.95  19.80  39.65  19.95  69.10  70.05  19.80 105.75  41.90  53.10 105.10  80.70  24.65  53.15  54.65 109.70  90.60  73.00
 [376]  54.75  20.50  92.10  75.30 106.75  91.30  80.00  91.30  47.85  50.15  74.45  80.60  94.40  75.25 107.15 116.25  19.55  20.10  20.20  74.80  76.20  55.70  75.30  45.70  54.35
 [401]  94.55  77.50  98.15 111.10  58.60  94.55 116.30  89.30  20.70  19.60  99.35  49.80  54.40  54.25  35.50  84.50  59.30  20.65  55.20  25.00  24.30  70.80  78.80  83.30  61.15
 [426]  45.15  96.00  35.10  20.25  49.25  74.00  20.25 105.00 105.30  19.60  74.30  89.65  19.35  81.80  95.75  40.20  90.95 108.20  85.80  76.25  19.70  94.65 104.40 103.30  80.60
 [451]  19.30 104.30  60.75  19.65  19.85  56.15  19.95  44.80  83.55 100.80  60.30  96.25  44.60  72.75  25.30  79.70 110.10  95.10  80.50 107.40  99.30  86.60  52.20  88.35  40.05
 [476]  90.85  75.25  69.55  79.90  85.35 114.35  24.60  19.95  79.85  59.70  19.45 107.55  70.35  20.85  89.60  19.65  83.60  84.25 111.45  74.95  97.35  69.35  84.60  98.40  51.05
 [501]  95.70  94.70  69.05  64.45  19.85  19.70  95.15  72.45  89.40  20.20  70.65  73.55  19.95  20.30  64.95  19.35  20.85  76.15  99.65  53.85  92.35  96.40 114.20  81.50  45.00
 [526] 104.95  90.35 110.50 105.00  55.50  59.10  56.15
#2/3)
#Base model analysis

lrn_cart <- lrn("classif.rpart", predict_type = "prob")

lrn_glm <- lrn("classif.glmnet", predict_type = "prob", alpha = 1)
pl_glm <- po("encode") %>>% po(lrn_glm)

lrn_feat <- lrn("classif.featureless", predict_type = "prob")

#lrn_lda <- lrn("classif.lda", predict_type = "prob")
#pl_lda <-  po(lrn_lda)



lrn_ranger <- lrn("classif.ranger", predict_type = "prob")
pl_ranger<-  po(lrn_ranger)

lrn_xgboost <- lrn("classif.xgboost", predict_type = "prob", eval_metric= "error")
pl_xgb <- po("encode") %>>% po(lrn_xgboost)

tele = tele %>% mutate_if(sapply(tele, is.character), as.factor)

#tele$Churn <- factor(tele$Churn, levels=c(0, 1))

credit_task <- TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")




cv5 <- rsmp("holdout")
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set

res <- benchmark(data.table(
  task       = list(credit_task),
  learner    = list(lrn_cart,pl_glm,
                    lrn_feat,
                    pl_ranger,
                    pl_xgb),
  resampling = list(cv5)
), store_models = TRUE)
INFO  [20:57:06.281] [mlr3]  Running benchmark with 5 resampling iterations 
INFO  [20:57:07.329] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:57:09.129] [mlr3]  Applying learner 'classif.featureless' on task 'telee' (iter 1/1) 
INFO  [20:57:09.149] [mlr3]  Applying learner 'encode.classif.xgboost' on task 'telee' (iter 1/1) 
INFO  [20:57:09.903] [mlr3]  Applying learner 'encode.classif.glmnet' on task 'telee' (iter 1/1) 
INFO  [20:57:11.388] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [20:57:11.529] [mlr3]  Finished benchmark 
res$aggregate(list(msr("classif.ce"),
                   msr("classif.fpr"),
                   msr("classif.fnr")))
NA
NA
#2/3)
#Tested the params I found through tuning (see tuning code below this cell)
#Note only tuned xgboost as GLMNET has a model called CV_GLMNET which tunes the regularisation param for us


lrn_cart <- lrn("classif.rpart", predict_type = "prob")

lrn_rcart <- lrn("classif.rpart", predict_type = "prob",cp = 0.013)#0.013

lrn_ranger <- lrn("classif.ranger", predict_type = "prob")
pl_ranger<- po(lrn_ranger)  

lrn_rranger <- lrn("classif.ranger", predict_type = "prob", num.trees = 248, max.depth = 16)
pl_rranger<-  po(lrn_rranger)



credit_task <- TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",               positive = "Yes")


cv5 <- rsmp("holdout")
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set

res <- benchmark(data.table(
  task       = list(credit_task),
  learner    = list(lrn_cart,
                    lrn_rcart,
                    pl_ranger,
                    pl_rranger),
  resampling = list(cv5)
), store_models = TRUE)
INFO  [20:57:16.997] [mlr3]  Running benchmark with 4 resampling iterations 
INFO  [20:57:17.035] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:57:18.063] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:57:20.070] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [20:57:20.144] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [20:57:20.231] [mlr3]  Finished benchmark 
res$aggregate(list(msr("classif.ce"),
                   msr("classif.fpr"),
                   msr("classif.fnr"),
                   msr("classif.fn"),
                   msr("classif.fp"),
                   msr("classif.tp"),
                   msr("classif.tn")
                   ))
NA
NA

#Next 4 cells are the ROC curves
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = list(lrn_cart,lrn_rcart, lrn_ranger,lrn_rranger)
resampling = rsmp("bootstrap")
#try bootstrap on the rest
#check i am using all the column in the data
#try larger grid values
#try chan ging thr fp trade off values
#look at bentchmarking bookm arkj
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO  [20:57:25.181] [mlr3]  Running benchmark with 120 resampling iterations 
INFO  [20:57:25.216] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/30) 
INFO  [20:57:27.918] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 17/30) 
INFO  [20:57:30.789] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 19/30) 
INFO  [20:57:30.905] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 23/30) 
INFO  [20:57:32.321] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/30) 
INFO  [20:57:32.391] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 2/30) 
INFO  [20:57:33.518] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 24/30) 
INFO  [20:57:33.595] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 4/30) 
INFO  [20:57:36.487] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 22/30) 
INFO  [20:57:37.599] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 10/30) 
INFO  [20:57:40.325] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 16/30) 
INFO  [20:57:42.818] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 11/30) 
INFO  [20:57:44.031] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 23/30) 
INFO  [20:57:44.165] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 9/30) 
INFO  [20:57:45.565] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 14/30) 
INFO  [20:57:45.639] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 27/30) 
INFO  [20:57:45.713] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 25/30) 
INFO  [20:57:45.788] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 29/30) 
INFO  [20:57:46.785] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 22/30) 
INFO  [20:57:46.856] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/30) 
INFO  [20:57:46.929] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 27/30) 
INFO  [20:57:47.002] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 13/30) 
INFO  [20:57:48.462] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 10/30) 
INFO  [20:57:48.580] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 25/30) 
INFO  [20:57:50.403] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 13/30) 
INFO  [20:57:51.632] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 30/30) 
INFO  [20:57:51.745] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 11/30) 
INFO  [20:57:53.645] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 26/30) 
INFO  [20:57:53.716] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 17/30) 
INFO  [20:57:53.812] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 29/30) 
INFO  [20:57:56.193] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 5/30) 
INFO  [20:57:58.494] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 9/30) 
INFO  [20:57:58.577] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 5/30) 
INFO  [20:57:59.471] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 18/30) 
INFO  [20:57:59.542] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 4/30) 
INFO  [20:58:00.479] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 25/30) 
INFO  [20:58:01.844] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 2/30) 
INFO  [20:58:01.918] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 14/30) 
INFO  [20:58:01.985] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 24/30) 
INFO  [20:58:03.947] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 20/30) 
INFO  [20:58:05.311] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 24/30) 
INFO  [20:58:06.230] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 7/30) 
INFO  [20:58:06.301] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 27/30) 
INFO  [20:58:07.254] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 28/30) 
INFO  [20:58:09.534] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 27/30) 
INFO  [20:58:11.829] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 20/30) 
INFO  [20:58:13.661] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 11/30) 
INFO  [20:58:13.778] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 12/30) 
INFO  [20:58:13.896] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 3/30) 
INFO  [20:58:16.180] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 12/30) 
INFO  [20:58:16.251] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 3/30) 
INFO  [20:58:17.186] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 13/30) 
INFO  [20:58:17.294] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 28/30) 
INFO  [20:58:18.574] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 8/30) 
INFO  [20:58:19.460] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 29/30) 
INFO  [20:58:19.531] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 6/30) 
INFO  [20:58:20.462] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 8/30) 
INFO  [20:58:23.101] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 9/30) 
INFO  [20:58:23.179] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 18/30) 
INFO  [20:58:24.561] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 5/30) 
INFO  [20:58:24.693] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 6/30) 
INFO  [20:58:24.888] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 2/30) 
INFO  [20:58:27.103] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 6/30) 
INFO  [20:58:29.465] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 18/30) 
INFO  [20:58:31.852] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 21/30) 
INFO  [20:58:32.749] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 15/30) 
INFO  [20:58:35.073] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 6/30) 
INFO  [20:58:35.145] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/30) 
INFO  [20:58:36.105] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 15/30) 
INFO  [20:58:36.178] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 11/30) 
INFO  [20:58:36.250] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 23/30) 
INFO  [20:58:38.753] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 14/30) 
INFO  [20:58:39.649] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 19/30) 
INFO  [20:58:42.146] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 20/30) 
INFO  [20:58:42.215] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 26/30) 
INFO  [20:58:44.366] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 16/30) 
INFO  [20:58:44.477] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 15/30) 
INFO  [20:58:45.453] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 17/30) 
INFO  [20:58:45.524] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 10/30) 
INFO  [20:58:46.391] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 20/30) 
INFO  [20:58:46.462] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 25/30) 
INFO  [20:58:46.535] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 23/30) 
INFO  [20:58:46.649] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 7/30) 
INFO  [20:58:46.762] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 8/30) 
INFO  [20:58:46.907] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 30/30) 
INFO  [20:58:47.015] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 30/30) 
INFO  [20:58:48.258] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 4/30) 
INFO  [20:58:48.328] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 21/30) 
INFO  [20:58:50.296] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 10/30) 
INFO  [20:58:50.407] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 26/30) 
INFO  [20:58:51.657] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 22/30) 
INFO  [20:58:53.601] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 2/30) 
INFO  [20:58:53.710] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 22/30) 
INFO  [20:58:53.849] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 7/30) 
INFO  [20:58:55.944] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 5/30) 
INFO  [20:58:56.016] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 8/30) 
INFO  [20:58:56.087] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 28/30) 
INFO  [20:58:56.162] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 21/30) 
INFO  [20:58:56.236] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 18/30) 
INFO  [20:58:56.310] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 13/30) 
INFO  [20:58:58.781] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 26/30) 
INFO  [20:58:58.849] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 29/30) 
INFO  [20:58:58.923] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 16/30) 
INFO  [20:58:59.011] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 4/30) 
INFO  [20:58:59.082] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 14/30) 
INFO  [20:59:01.384] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 16/30) 
INFO  [20:59:02.258] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 12/30) 
INFO  [20:59:03.170] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 15/30) 
INFO  [20:59:03.280] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 3/30) 
INFO  [20:59:03.388] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 21/30) 
INFO  [20:59:03.499] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 19/30) 
INFO  [20:59:03.609] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 7/30) 
INFO  [20:59:04.889] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 9/30) 
INFO  [20:59:06.911] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 12/30) 
INFO  [20:59:09.095] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 17/30) 
INFO  [20:59:10.053] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 28/30) 
INFO  [20:59:10.162] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 19/30) 
INFO  [20:59:11.441] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 3/30) 
INFO  [20:59:11.514] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 30/30) 
INFO  [20:59:13.496] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 24/30) 
INFO  [20:59:13.640] [mlr3]  Finished benchmark 
head(fortify(object))

autoplot(object)


#Next 4 cells are the ROC curves
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = list(lrn("classif.rpart", predict_type = "prob"))
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO  [20:59:22.954] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:22.997] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [20:59:23.075] [mlr3]  Finished benchmark 
head(fortify(object))

autoplot(object)


autoplot(object$clone(deep = TRUE), type = "roc")

library(mlr3viz)
library(precrec)
tasks = credit_task

learner    =  lrn("classif.rpart", predict_type = "prob",cp = 0.013)
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO  [20:59:24.706] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:24.742] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [20:59:24.824] [mlr3]  Finished benchmark 
head(fortify(object))

autoplot(object)


autoplot(object$clone(deep = TRUE), type = "roc")

library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = lrn("classif.ranger", predict_type = "prob" )
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO  [20:59:26.205] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:26.241] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:27.884] [mlr3]  Finished benchmark 
head(fortify(object))

autoplot(object)


autoplot(object$clone(deep = TRUE), type = "roc")

library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = lrn("classif.ranger", predict_type = "prob", num.trees = 248, max.depth = 16)
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))
INFO  [20:59:29.527] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:29.562] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:30.363] [mlr3]  Finished benchmark 
head(fortify(object))

autoplot(object)


autoplot(object$clone(deep = TRUE), type = "roc")

#plot of cost penalty for tree
lrn_cart_cv <- lrn("classif.rpart", predict_type = "prob",xval=10)
cv5 <- rsmp("bootstrap")
res_cart_cv <- resample(credit_task, lrn_cart_cv, cv5, store_models = TRUE)
INFO  [20:59:31.944] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 11/30) 
INFO  [20:59:32.237] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 26/30) 
INFO  [20:59:32.625] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 20/30) 
INFO  [20:59:33.055] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 7/30) 
INFO  [20:59:33.453] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 19/30) 
INFO  [20:59:33.698] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 6/30) 
INFO  [20:59:33.945] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 8/30) 
INFO  [20:59:34.217] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 15/30) 
INFO  [20:59:34.473] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 13/30) 
INFO  [20:59:34.810] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 30/30) 
INFO  [20:59:35.194] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 14/30) 
INFO  [20:59:35.548] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 27/30) 
INFO  [20:59:35.919] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 12/30) 
INFO  [20:59:36.300] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 21/30) 
INFO  [20:59:36.656] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 24/30) 
INFO  [20:59:36.943] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 16/30) 
INFO  [20:59:37.194] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 25/30) 
INFO  [20:59:37.443] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 22/30) 
INFO  [20:59:37.691] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 17/30) 
INFO  [20:59:37.945] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 18/30) 
INFO  [20:59:38.346] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 28/30) 
INFO  [20:59:38.723] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 4/30) 
INFO  [20:59:39.078] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 29/30) 
INFO  [20:59:39.447] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 10/30) 
INFO  [20:59:39.818] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 9/30) 
INFO  [20:59:40.172] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/30) 
INFO  [20:59:40.433] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 3/30) 
INFO  [20:59:40.682] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 23/30) 
INFO  [20:59:40.936] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 5/30) 
INFO  [20:59:41.184] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 2/30) 
rpart::plotcp(res_cart_cv$learners[[10]]$model)

lrn_ranger$param_set
<ParamSet>

#Tuning NO. of trees & max depth
learner <- lrn("classif.ranger", predict_type = "prob")


search_space = ps(
  num.trees = p_int(lower = 200, upper = 500),
  max.depth = p_int(lower = 2, upper = 30)
  
)



hout <- rsmp("holdout")
measure = msr("classif.acc")


evals20 = trm("evals", n_evals = 10)

task<-TaskClassif$new(id = "telee",
                               backend = na.omit(tele), # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")

instance = TuningInstanceSingleCrit$new(
  task = task,
  learner = learner,
  resampling = hout,
  measure = measure,
  search_space = search_space,
  terminator = evals20
)
instance
<TuningInstanceSingleCrit>
* State:  Not optimized
* Objective: <ObjectiveTuning:classif.ranger_on_telee>
* Search Space:
<ParamSet>
* Terminator: <TerminatorEvals>
* Terminated: FALSE
* Archive:
<ArchiveTuning>
tuner = tnr("grid_search", resolution = 250)
tuner$optimize(instance)
INFO  [20:59:44.194] [bbotk] Starting to optimize 2 parameter(s) with '<OptimizerGridSearch>' and '<TerminatorEvals> [n_evals=10]' 
INFO  [20:59:44.240] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:44.387] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:44.425] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:45.423] [mlr3]  Finished benchmark 
INFO  [20:59:45.552] [bbotk] Result of batch 1: 
INFO  [20:59:45.618] [bbotk]  
INFO  [20:59:45.652] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:45.749] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:45.784] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:46.915] [mlr3]  Finished benchmark 
INFO  [20:59:47.075] [bbotk] Result of batch 2: 
INFO  [20:59:47.125] [bbotk]  
INFO  [20:59:47.150] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:47.246] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:47.286] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:48.300] [mlr3]  Finished benchmark 
INFO  [20:59:48.446] [bbotk] Result of batch 3: 
INFO  [20:59:48.509] [bbotk]  
INFO  [20:59:48.535] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:48.629] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:48.677] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:50.205] [mlr3]  Finished benchmark 
INFO  [20:59:50.374] [bbotk] Result of batch 4: 
INFO  [20:59:50.421] [bbotk]  
INFO  [20:59:50.449] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:50.562] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:50.601] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:51.991] [mlr3]  Finished benchmark 
INFO  [20:59:52.143] [bbotk] Result of batch 5: 
INFO  [20:59:52.188] [bbotk]  
INFO  [20:59:52.216] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:52.313] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:52.352] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:53.145] [mlr3]  Finished benchmark 
INFO  [20:59:53.321] [bbotk] Result of batch 6: 
INFO  [20:59:53.373] [bbotk]  
INFO  [20:59:53.401] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:53.512] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:53.547] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:54.879] [mlr3]  Finished benchmark 
INFO  [20:59:55.066] [bbotk] Result of batch 7: 
INFO  [20:59:55.117] [bbotk]  
INFO  [20:59:55.144] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:55.250] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:55.289] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:56.786] [mlr3]  Finished benchmark 
INFO  [20:59:56.936] [bbotk] Result of batch 8: 
INFO  [20:59:56.984] [bbotk]  
INFO  [20:59:57.010] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:57.110] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:57.151] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:57.766] [mlr3]  Finished benchmark 
INFO  [20:59:57.935] [bbotk] Result of batch 9: 
INFO  [20:59:57.986] [bbotk]  
INFO  [20:59:58.012] [bbotk] Evaluating 1 configuration(s) 
INFO  [20:59:58.139] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [20:59:58.173] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [20:59:59.770] [mlr3]  Finished benchmark 
INFO  [20:59:59.942] [bbotk] Result of batch 10: 
INFO  [20:59:59.990] [bbotk]  
INFO  [21:00:00.040] [bbotk] Finished optimizing after 10 evaluation(s) 
INFO  [21:00:00.065] [bbotk] Result: 
INFO  [21:00:00.128] [bbotk]  
lrn_cart$param_set
<ParamSet>

#tuning tree i.e the penalty cost

learner <- lrn("classif.rpart", predict_type = "prob")



search_space = ps(
  cp = p_dbl(lower = 0.0001, upper = 0.1)
)

#tele = tele %>% mutate_if(sapply(tele, is.character), as.factor)

hout <- rsmp("holdout")
measure = msr("classif.acc")


evals20 = trm("evals", n_evals = 10)

task<-TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")



instance = TuningInstanceSingleCrit$new(
  task = task,
  learner = learner,
  resampling = hout,
  measure = measure,
  search_space = search_space,
  terminator = evals20
)
instance
<TuningInstanceSingleCrit>
* State:  Not optimized
* Objective: <ObjectiveTuning:classif.rpart_on_telee>
* Search Space:
<ParamSet>
* Terminator: <TerminatorEvals>
* Terminated: FALSE
* Archive:
<ArchiveTuning>
tuner = tnr("grid_search", resolution = 250)
tuner$optimize(instance)
INFO  [21:00:01.557] [bbotk] Starting to optimize 1 parameter(s) with '<OptimizerGridSearch>' and '<TerminatorEvals> [n_evals=10]' 
INFO  [21:00:01.583] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:01.655] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:01.692] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:01.763] [mlr3]  Finished benchmark 
INFO  [21:00:01.892] [bbotk] Result of batch 1: 
INFO  [21:00:01.949] [bbotk]  
INFO  [21:00:01.973] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:02.058] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:02.098] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:02.168] [mlr3]  Finished benchmark 
INFO  [21:00:02.286] [bbotk] Result of batch 2: 
INFO  [21:00:02.328] [bbotk]  
INFO  [21:00:02.351] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:02.418] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:02.454] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:02.531] [mlr3]  Finished benchmark 
INFO  [21:00:02.643] [bbotk] Result of batch 3: 
INFO  [21:00:02.689] [bbotk]  
INFO  [21:00:02.711] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:02.779] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:02.815] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:02.900] [mlr3]  Finished benchmark 
INFO  [21:00:03.016] [bbotk] Result of batch 4: 
INFO  [21:00:03.068] [bbotk]  
INFO  [21:00:03.094] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:03.162] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:03.196] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:03.307] [mlr3]  Finished benchmark 
INFO  [21:00:03.446] [bbotk] Result of batch 5: 
INFO  [21:00:03.494] [bbotk]  
INFO  [21:00:03.519] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:03.585] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:03.624] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:03.709] [mlr3]  Finished benchmark 
INFO  [21:00:03.823] [bbotk] Result of batch 6: 
INFO  [21:00:03.871] [bbotk]  
INFO  [21:00:03.900] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:03.972] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:04.008] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:04.093] [mlr3]  Finished benchmark 
INFO  [21:00:04.215] [bbotk] Result of batch 7: 
INFO  [21:00:04.263] [bbotk]  
INFO  [21:00:04.292] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:04.364] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:04.402] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:04.490] [mlr3]  Finished benchmark 
INFO  [21:00:04.606] [bbotk] Result of batch 8: 
INFO  [21:00:04.651] [bbotk]  
INFO  [21:00:04.676] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:04.744] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:04.778] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:04.897] [mlr3]  Finished benchmark 
INFO  [21:00:05.032] [bbotk] Result of batch 9: 
INFO  [21:00:05.078] [bbotk]  
INFO  [21:00:05.103] [bbotk] Evaluating 1 configuration(s) 
INFO  [21:00:05.230] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:05.270] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/1) 
INFO  [21:00:05.361] [mlr3]  Finished benchmark 
INFO  [21:00:05.485] [bbotk] Result of batch 10: 
INFO  [21:00:05.530] [bbotk]  
INFO  [21:00:05.566] [bbotk] Finished optimizing after 10 evaluation(s) 
INFO  [21:00:05.590] [bbotk] Result: 
INFO  [21:00:05.635] [bbotk]  


#optimising for the false positive rate



gr = lrn("classif.rpart", predict_type = "prob") %>>% po("threshold")
learner = GraphLearner$new(gr)


search_space = ps(
  threshold.thresholds = p_dbl(lower = 0.36, upper = 0.64)
)


terminator = trm("evals", n_evals = 10)
tuner = tnr("grid_search")

at = AutoTuner$new(
  learner = learner,
  resampling = rsmp("holdout"),
  measure = msr("classif.fnr"),
  search_space = search_space,
  terminator = terminator,
  tuner = tuner
)
at
<AutoTuner:classif.rpart.threshold.tuned>
* Model: -
* Parameters: list()
* Packages: -
* Predict Type: response
* Feature types: logical, integer, numeric, character, factor, ordered, POSIXct
* Properties: featureless, importance, missings, multiclass, oob_error, selected_features, twoclass, weights
grid = benchmark_grid(
  task = task,
  learner = list(at, lrn("classif.rpart")),
  resampling = rsmp("cv", folds = 3)
)

# avoid console output from mlr3tuning
logger = lgr::get_logger("bbotk")
logger$set_threshold("warn")

bmr = benchmark(grid)
INFO  [21:00:07.140] [mlr3]  Running benchmark with 6 resampling iterations 
INFO  [21:00:07.177] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/3) 
INFO  [21:00:07.252] [mlr3]  Applying learner 'classif.rpart.threshold.tuned' on task 'telee' (iter 3/3) 
INFO  [21:00:07.583] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:07.603] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:07.843] [mlr3]  Finished benchmark 
INFO  [21:00:08.134] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:08.154] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:08.365] [mlr3]  Finished benchmark 
INFO  [21:00:08.608] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:08.626] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:08.832] [mlr3]  Finished benchmark 
INFO  [21:00:09.094] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:09.114] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:09.327] [mlr3]  Finished benchmark 
INFO  [21:00:09.577] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:09.617] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:09.847] [mlr3]  Finished benchmark 
INFO  [21:00:10.227] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:10.242] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:10.497] [mlr3]  Finished benchmark 
INFO  [21:00:10.939] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:10.966] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:11.304] [mlr3]  Finished benchmark 
INFO  [21:00:11.671] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:11.696] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:12.074] [mlr3]  Finished benchmark 
INFO  [21:00:12.434] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:12.455] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:12.752] [mlr3]  Finished benchmark 
INFO  [21:00:13.119] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:13.135] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:13.327] [mlr3]  Finished benchmark 
INFO  [21:00:13.882] [mlr3]  Applying learner 'classif.rpart.threshold.tuned' on task 'telee' (iter 1/3) 
INFO  [21:00:14.345] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:14.367] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:14.667] [mlr3]  Finished benchmark 
INFO  [21:00:15.148] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:15.173] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:15.474] [mlr3]  Finished benchmark 
INFO  [21:00:15.854] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:15.875] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:16.207] [mlr3]  Finished benchmark 
INFO  [21:00:16.496] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:16.511] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:16.708] [mlr3]  Finished benchmark 
INFO  [21:00:16.951] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:16.978] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:17.277] [mlr3]  Finished benchmark 
INFO  [21:00:17.638] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:17.666] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:17.976] [mlr3]  Finished benchmark 
INFO  [21:00:18.381] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:18.404] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:18.703] [mlr3]  Finished benchmark 
INFO  [21:00:19.055] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:19.082] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:19.391] [mlr3]  Finished benchmark 
INFO  [21:00:19.737] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:19.752] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:19.942] [mlr3]  Finished benchmark 
INFO  [21:00:20.220] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:20.248] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:20.515] [mlr3]  Finished benchmark 
INFO  [21:00:21.251] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 2/3) 
INFO  [21:00:21.344] [mlr3]  Applying learner 'classif.rpart.threshold.tuned' on task 'telee' (iter 2/3) 
INFO  [21:00:21.743] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:21.775] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:22.080] [mlr3]  Finished benchmark 
INFO  [21:00:22.482] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:22.506] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:22.795] [mlr3]  Finished benchmark 
INFO  [21:00:23.098] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:23.112] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:23.309] [mlr3]  Finished benchmark 
INFO  [21:00:23.535] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:23.550] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:23.808] [mlr3]  Finished benchmark 
INFO  [21:00:24.160] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:24.184] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:24.517] [mlr3]  Finished benchmark 
INFO  [21:00:24.873] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:24.892] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:25.181] [mlr3]  Finished benchmark 
INFO  [21:00:25.564] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:25.587] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:25.888] [mlr3]  Finished benchmark 
INFO  [21:00:26.254] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:26.279] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:26.552] [mlr3]  Finished benchmark 
INFO  [21:00:26.778] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:26.794] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:27.004] [mlr3]  Finished benchmark 
INFO  [21:00:27.358] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:27.376] [mlr3]  Applying learner 'classif.rpart.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:27.661] [mlr3]  Finished benchmark 
INFO  [21:00:28.454] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 3/3) 
INFO  [21:00:28.585] [mlr3]  Finished benchmark 
bmr$aggregate(msrs(c("classif.ce",
                   "classif.fpr",
                   "classif.fnr",
                   "classif.fn",
                   "classif.fp",
                   "classif.tp",
                   "classif.tn")))
NA
NA
NA
NA
#optimising for the false positive rate


task<-TaskClassif$new(id = "telee",
                               backend = na.omit(tele), # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")

gr = lrn("classif.ranger", predict_type = "prob") %>>% po("threshold")

learner = GraphLearner$new(gr) 


search_space = ps(
  threshold.thresholds = p_dbl(lower = 0.35, upper = 0.65)
)


terminator = trm("evals", n_evals = 10)
tuner = tnr("grid_search")

at = AutoTuner$new(
  learner = learner,
  resampling = rsmp("holdout"),
  measure = msr("classif.fnr"),
  search_space = search_space,
  terminator = terminator,
  tuner = tuner
)
at
<AutoTuner:classif.ranger.threshold.tuned>
* Model: -
* Parameters: list()
* Packages: -
* Predict Type: response
* Feature types: logical, integer, numeric, character, factor, ordered, POSIXct
* Properties: featureless, importance, missings, multiclass, oob_error, selected_features, twoclass, weights
grid = benchmark_grid(
  task = task,
  learner = list(at, lrn("classif.ranger")),
  resampling = rsmp("holdout")
)

# avoid console output from mlr3tuning
logger = lgr::get_logger("bbotk")
logger$set_threshold("warn")

bmr = benchmark(grid)
INFO  [21:00:31.593] [mlr3]  Running benchmark with 2 resampling iterations 
INFO  [21:00:31.630] [mlr3]  Applying learner 'classif.ranger' on task 'telee' (iter 1/1) 
INFO  [21:00:33.144] [mlr3]  Applying learner 'classif.ranger.threshold.tuned' on task 'telee' (iter 1/1) 
INFO  [21:00:33.950] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:33.968] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:35.351] [mlr3]  Finished benchmark 
INFO  [21:00:36.052] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:36.083] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:37.950] [mlr3]  Finished benchmark 
INFO  [21:00:38.675] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:38.691] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:40.171] [mlr3]  Finished benchmark 
INFO  [21:00:40.781] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:40.806] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:42.305] [mlr3]  Finished benchmark 
INFO  [21:00:42.817] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:42.844] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:44.464] [mlr3]  Finished benchmark 
INFO  [21:00:45.047] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:45.074] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:46.449] [mlr3]  Finished benchmark 
INFO  [21:00:47.139] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:47.160] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:48.892] [mlr3]  Finished benchmark 
INFO  [21:00:49.319] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:49.346] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:51.816] [mlr3]  Finished benchmark 
INFO  [21:00:52.726] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:52.742] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:54.385] [mlr3]  Finished benchmark 
INFO  [21:00:55.094] [mlr3]  Running benchmark with 1 resampling iterations 
INFO  [21:00:55.120] [mlr3]  Applying learner 'classif.ranger.threshold' on task 'telee' (iter 1/1) 
INFO  [21:00:56.581] [mlr3]  Finished benchmark 
INFO  [21:00:59.824] [mlr3]  Finished benchmark 
bmr$aggregate(msrs(c("classif.ce",
                   "classif.fpr",
                   "classif.fnr",
                   "classif.fn",
                   "classif.fp",
                   "classif.tp",
                   "classif.tn")))
NA
NA
NA

lrn_cart <- lrn("classif.rpart", predict_type = "prob")


credit_task <- TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",               positive = "Yes")


cv5 <- rsmp("cv", folds = 10)
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set

res <- benchmark(data.table(
  task       = list(credit_task),
  learner    = list(lrn_cart),
  resampling = list(cv5)
), store_models = TRUE)
INFO  [21:01:01.408] [mlr3]  Running benchmark with 10 resampling iterations 
INFO  [21:01:01.448] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 7/10) 
INFO  [21:01:01.589] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 2/10) 
INFO  [21:01:01.663] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 10/10) 
INFO  [21:01:01.736] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 3/10) 
INFO  [21:01:01.811] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 6/10) 
INFO  [21:01:01.894] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 4/10) 
INFO  [21:01:01.966] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 5/10) 
INFO  [21:01:02.046] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 8/10) 
INFO  [21:01:02.118] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 9/10) 
INFO  [21:01:02.191] [mlr3]  Applying learner 'classif.rpart' on task 'telee' (iter 1/10) 
INFO  [21:01:02.280] [mlr3]  Finished benchmark 
res$aggregate(list(msr("classif.ce")))



trees <- res$resample_result(1)

# Then, let's look at the tree from first CV iteration, for example:
tree1 <- trees$learners[[1]]

# This is a fitted rpart object, so we can look at the model within
tree1_rpart <- tree1$model

# If you look in the rpart package documentation, it tells us how to plot the
# tree that was fitted

plot(tree1_rpart, compress = TRUE)

text(tree1_rpart, use.n = TRUE)

library(mlr)
Loading required package: ParamHelpers
Warning message: 'mlr' is in 'maintenance-only' mode since July 2019. Future development will only happen in 'mlr3' (<https://mlr3.mlr-org.com>). Due to the focus
on 'mlr3' there might be uncaught bugs meanwhile in {mlr} - please consider switching.

Attaching package: 㤼㸱mlr㤼㸲

The following objects are masked _by_ 㤼㸱.GlobalEnv㤼㸲:

    cv5, hout

The following objects are masked from 㤼㸱package:mlr3㤼㸲:

    benchmark, resample

The following objects are masked from 㤼㸱package:mlr3verse㤼㸲:

    benchmark, resample
library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.

Attaching package: 㤼㸱randomForest㤼㸲

The following object is masked from 㤼㸱package:gridExtra㤼㸲:

    combine

The following object is masked from 㤼㸱package:dplyr㤼㸲:

    combine

The following object is masked from 㤼㸱package:ggplot2㤼㸲:

    margin
lrns = list(
    makeLearner('classif.rpart', predict.type="prob"),
  "classif.randomForest"
)
regr.task = makeClassifTask(id = "telee", data = na.omit(tele), target = "Churn")
Provided data is not a pure data.frame but from class tbl_df, hence it will be converted.
rin = makeResampleDesc(method = "Holdout")
lc = generateLearningCurveData(learners = lrns, task = regr.task,
  percs = seq(0.1, 1, by = 0.1), measures = acc,
  resampling = rin, show.info = FALSE)
plotLearningCurve(lc)

lrns = list(
    makeLearner('classif.rpart', predict.type="prob"),
  makeLearner("classif.randomForest",ntree = 2,mtry = 1, nodesize=2)
)

rin2 = makeResampleDesc(method = "Holdout", predict = "both")
lc2 = generateLearningCurveData(learners = lrns, task = regr.task,
  percs = seq(0.1, 1, by = 0.1),
  measures = list(acc, setAggregation(acc, train.mean)), resampling = rin2,
  show.info = FALSE)
plotLearningCurve(lc2, facet = "learner")

---
title: "R Notebook"
output:
  html_document:
    df_print: paged
  html_notebook: default
  pdf_document: default
---
```{r}


library(readr)
tele <- read_csv("C:\\Users\\Rober\\OneDrive\\Documents\\Uni Stuff\\NOTEBOOKS\\Class\\telecom.csv")

View(tele)
```
```{r}
library("tidyverse")
library("ggplot2")
library("magrittr") 
library("dplyr") 
library("data.table")
library("mlr3verse")
library("paradox")
library("mlr3tuning")
```

```{R}
library("skimr")
skim(tele)
#data exploration
```
```{r}
#Data manipulation
library("plyr")
library("FSA")
library("corrplot")
library("gridExtra")
library("GGally")
tele <- tele[complete.cases(tele), ]

group_tenure <- function(tenure){
    if (tenure >= 0 & tenure <= 12){
        return('0-12 Month')
    }else if(tenure > 12 & tenure <= 24){
        return('12-24 Month')
    }else if (tenure > 24 & tenure <= 48){
        return('24-48 Month')
    }else if (tenure > 48 & tenure <=60){
        return('48-60 Month')
    }else if (tenure > 60){
        return('> 60 Month')
    }
}
tele$tenure_group <- sapply(tele$tenure,group_tenure)
tele$tenure_group <- as.factor(tele$tenure_group)

tele$tenure <- NULL
tele$TotalCharges <- NULL

#view(tele)

p7 <- ggplot(tele, aes(x=InternetService)) + ggtitle("Internet Service") + xlab("Internet Service") + 
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p8 <- ggplot(tele, aes(x=OnlineSecurity)) + ggtitle("Online Security") + xlab("Online Security") +
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
grid.arrange(p7, p8, ncol=2)

p13 <- ggplot(tele, aes(x=StreamingMovies)) + ggtitle("Streaming Movies") + xlab("Streaming Movies") +
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p14 <- ggplot(tele, aes(x=Contract)) + ggtitle("Contract") + xlab("Contract") + 
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p15 <- ggplot(tele, aes(x=PaperlessBilling)) + ggtitle("Paperless Billing") + xlab("Paperless Billing") + 
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p16 <- ggplot(tele, aes(x=PaymentMethod)) + ggtitle("Payment Method") + xlab("Payment Method") +
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
p17 <- ggplot(tele, aes(x=tenure_group)) + ggtitle("Tenure Group") + xlab("Tenure Group") +
  geom_bar(aes(y = 100*(..count..)/sum(..count..)), width = 0.5) + ylab("Percentage") + coord_flip() + theme_minimal()
grid.arrange(p13, p14, p15, p16, p17, ncol=2)

ggpairs(tele%>% select(StreamingTV,Partner,Churn),aes(color =Churn ))

tele[["MonthlyCharges"]]


```









```{R}
#2/3)
#Base model analysis

lrn_cart <- lrn("classif.rpart", predict_type = "prob")

lrn_glm <- lrn("classif.glmnet", predict_type = "prob", alpha = 1)
pl_glm <- po("encode") %>>% po(lrn_glm)

lrn_feat <- lrn("classif.featureless", predict_type = "prob")

#lrn_lda <- lrn("classif.lda", predict_type = "prob")
#pl_lda <-  po(lrn_lda)



lrn_ranger <- lrn("classif.ranger", predict_type = "prob")
pl_ranger<-  po(lrn_ranger)

lrn_xgboost <- lrn("classif.xgboost", predict_type = "prob", eval_metric= "error")
pl_xgb <- po("encode") %>>% po(lrn_xgboost)

tele = tele %>% mutate_if(sapply(tele, is.character), as.factor)

#tele$Churn <- factor(tele$Churn, levels=c(0, 1))

credit_task <- TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")




cv5 <- rsmp("holdout")
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set

res <- benchmark(data.table(
  task       = list(credit_task),
  learner    = list(lrn_cart,pl_glm,
                    lrn_feat,
                    pl_ranger,
                    pl_xgb),
  resampling = list(cv5)
), store_models = TRUE)


res$aggregate(list(msr("classif.ce"),
                   msr("classif.fpr"),
                   msr("classif.fnr")))


```

```{R}
#2/3)
#Tested the params I found through tuning (see tuning code below this cell)
#Note only tuned xgboost as GLMNET has a model called CV_GLMNET which tunes the regularisation param for us


lrn_cart <- lrn("classif.rpart", predict_type = "prob")

lrn_rcart <- lrn("classif.rpart", predict_type = "prob",cp = 0.013)#0.013

lrn_ranger <- lrn("classif.ranger", predict_type = "prob")
pl_ranger<- po(lrn_ranger)	

lrn_rranger <- lrn("classif.ranger", predict_type = "prob", num.trees = 248, max.depth = 16)
pl_rranger<-  po(lrn_rranger)



credit_task <- TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",               positive = "Yes")


cv5 <- rsmp("holdout")
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set

res <- benchmark(data.table(
  task       = list(credit_task),
  learner    = list(lrn_cart,
                    lrn_rcart,
                    pl_ranger,
                    pl_rranger),
  resampling = list(cv5)
), store_models = TRUE)


res$aggregate(list(msr("classif.ce"),
                   msr("classif.fpr"),
                   msr("classif.fnr"),
                   msr("classif.fn"),
                   msr("classif.fp"),
                   msr("classif.tp"),
                   msr("classif.tn")
                   ))


```

```{R}

#Next 4 cells are the ROC curves
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = list(lrn_cart,lrn_rcart, lrn_ranger,lrn_rranger)
resampling = rsmp("bootstrap")
#try bootstrap on the rest
#check i am using all the column in the data
#try larger grid values
#try chan ging thr fp trade off values
#look at bentchmarking bookm arkj
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)
```




```{R}

#Next 4 cells are the ROC curves
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = list(lrn("classif.rpart", predict_type = "prob"))
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")
```
```{R}
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    =  lrn("classif.rpart", predict_type = "prob",cp = 0.013)
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")
```


```{R}
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = lrn("classif.ranger", predict_type = "prob" )
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")
```


```{R}
library(mlr3viz)
library(precrec)
tasks = credit_task

learner    = lrn("classif.ranger", predict_type = "prob", num.trees = 248, max.depth = 16)
resampling = cv5
object = benchmark(benchmark_grid(tasks, learner, resampling))

head(fortify(object))

autoplot(object)

autoplot(object$clone(deep = TRUE), type = "roc")
```





































```{R}
#plot of cost penalty for tree
lrn_cart_cv <- lrn("classif.rpart", predict_type = "prob",xval=10)
cv5 <- rsmp("bootstrap")
res_cart_cv <- resample(credit_task, lrn_cart_cv, cv5, store_models = TRUE)


rpart::plotcp(res_cart_cv$learners[[10]]$model)
```



```{R}
lrn_ranger$param_set
```

```{R}

#Tuning NO. of trees & max depth
learner <- lrn("classif.ranger", predict_type = "prob")


search_space = ps(
  num.trees = p_int(lower = 200, upper = 500),
  max.depth = p_int(lower = 2, upper = 30)
  
)



hout <- rsmp("holdout")
measure = msr("classif.acc")


evals20 = trm("evals", n_evals = 10)

task<-TaskClassif$new(id = "telee",
                               backend = na.omit(tele), # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")

instance = TuningInstanceSingleCrit$new(
  task = task,
  learner = learner,
  resampling = hout,
  measure = measure,
  search_space = search_space,
  terminator = evals20
)
instance


tuner = tnr("grid_search", resolution = 250)
tuner$optimize(instance)
``` 

```{R}
lrn_cart$param_set
```

```{R}

#tuning tree i.e the penalty cost

learner <- lrn("classif.rpart", predict_type = "prob")



search_space = ps(
  cp = p_dbl(lower = 0.0001, upper = 0.1)
)

#tele = tele %>% mutate_if(sapply(tele, is.character), as.factor)

hout <- rsmp("holdout")
measure = msr("classif.acc")


evals20 = trm("evals", n_evals = 10)

task<-TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")



instance = TuningInstanceSingleCrit$new(
  task = task,
  learner = learner,
  resampling = hout,
  measure = measure,
  search_space = search_space,
  terminator = evals20
)
instance


tuner = tnr("grid_search", resolution = 250)
tuner$optimize(instance)


```

```{R}

```


```{R}


#optimising for the false positive rate



gr = lrn("classif.rpart", predict_type = "prob") %>>% po("threshold")
learner = GraphLearner$new(gr)


search_space = ps(
  threshold.thresholds = p_dbl(lower = 0.36, upper = 0.64)
)


terminator = trm("evals", n_evals = 10)
tuner = tnr("grid_search")

at = AutoTuner$new(
  learner = learner,
  resampling = rsmp("holdout"),
  measure = msr("classif.fnr"),
  search_space = search_space,
  terminator = terminator,
  tuner = tuner
)
at

grid = benchmark_grid(
  task = task,
  learner = list(at, lrn("classif.rpart")),
  resampling = rsmp("cv", folds = 3)
)

# avoid console output from mlr3tuning
logger = lgr::get_logger("bbotk")
logger$set_threshold("warn")

bmr = benchmark(grid)
bmr$aggregate(msrs(c("classif.ce",
                   "classif.fpr",
                   "classif.fnr",
                   "classif.fn",
                   "classif.fp",
                   "classif.tp",
                   "classif.tn")))




```



```{R}
#optimising for the false positive rate


task<-TaskClassif$new(id = "telee",
                               backend = na.omit(tele), # <- NB: no na.omit() this time
                               target = "Churn",
                               positive = "Yes")

gr = lrn("classif.ranger", predict_type = "prob") %>>% po("threshold")

learner = GraphLearner$new(gr) 


search_space = ps(
  threshold.thresholds = p_dbl(lower = 0.35, upper = 0.65)
)


terminator = trm("evals", n_evals = 10)
tuner = tnr("grid_search")

at = AutoTuner$new(
  learner = learner,
  resampling = rsmp("holdout"),
  measure = msr("classif.fnr"),
  search_space = search_space,
  terminator = terminator,
  tuner = tuner
)
at

grid = benchmark_grid(
  task = task,
  learner = list(at, lrn("classif.ranger")),
  resampling = rsmp("holdout")
)

# avoid console output from mlr3tuning
logger = lgr::get_logger("bbotk")
logger$set_threshold("warn")

bmr = benchmark(grid)
bmr$aggregate(msrs(c("classif.ce",
                   "classif.fpr",
                   "classif.fnr",
                   "classif.fn",
                   "classif.fp",
                   "classif.tp",
                   "classif.tn")))



```
```{R}

lrn_cart <- lrn("classif.rpart", predict_type = "prob")


credit_task <- TaskClassif$new(id = "telee",
                               backend = tele, # <- NB: no na.omit() this time
                               target = "Churn",               positive = "Yes")


cv5 <- rsmp("cv", folds = 10)
cv5$instantiate(credit_task)
# Now fit as normal ... we can just add it to our benchmark set

res <- benchmark(data.table(
  task       = list(credit_task),
  learner    = list(lrn_cart),
  resampling = list(cv5)
), store_models = TRUE)


res$aggregate(list(msr("classif.ce")))



trees <- res$resample_result(1)

# Then, let's look at the tree from first CV iteration, for example:
tree1 <- trees$learners[[1]]

# This is a fitted rpart object, so we can look at the model within
tree1_rpart <- tree1$model

# If you look in the rpart package documentation, it tells us how to plot the
# tree that was fitted

plot(tree1_rpart, compress = TRUE)

text(tree1_rpart, use.n = TRUE)
```




```{R}
library(mlr)
library(randomForest)

lrns = list(
    makeLearner('classif.rpart', predict.type="prob"),
  "classif.randomForest"
)
regr.task = makeClassifTask(id = "telee", data = na.omit(tele), target = "Churn")

rin = makeResampleDesc(method = "Holdout")
lc = generateLearningCurveData(learners = lrns, task = regr.task,
  percs = seq(0.1, 1, by = 0.1), measures = acc,
  resampling = rin, show.info = FALSE)
plotLearningCurve(lc)
```
```{R}
lrns = list(
    makeLearner('classif.rpart', predict.type="prob"),
  makeLearner("classif.randomForest",ntree = 2,mtry = 1, nodesize=2)
)

rin2 = makeResampleDesc(method = "Holdout", predict = "both")
lc2 = generateLearningCurveData(learners = lrns, task = regr.task,
  percs = seq(0.1, 1, by = 0.1),
  measures = list(acc, setAggregation(acc, train.mean)), resampling = rin2,
  show.info = FALSE)
plotLearningCurve(lc2, facet = "learner")
```



